#!/usr/bin/env python3
"""Produce a diff between mypy caches.

With some infrastructure, this can allow for distributing small cache diffs to users in
many cases instead of full cache artifacts.
"""

import argparse
import json
import os
import sys

from collections import defaultdict
from typing import Any, Dict, Optional, Set

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from mypy.metastore import FilesystemMetadataStore, MetadataStore, SqliteMetadataStore


def make_cache(input_dir: str, sqlite: bool) -> MetadataStore:
    if sqlite:
        return SqliteMetadataStore(input_dir)
    else:
        return FilesystemMetadataStore(input_dir)


def merge_deps(all: Dict[str, Set[str]], new: Dict[str, Set[str]]) -> None:
    for k, v in new.items():
        all.setdefault(k, set()).update(v)


def load(cache: MetadataStore, s: str) -> Any:
    data = cache.read(s)
    obj = json.loads(data)
    if s.endswith(".meta.json"):
        # For meta files, zero out the mtimes and sort the
        # dependencies to avoid spurious conflicts
        obj["mtime"] = 0
        obj["data_mtime"] = 0
        if "dependencies" in obj:
            all_deps = obj["dependencies"] + obj["suppressed"]
            num_deps = len(obj["dependencies"])
            thing = list(zip(all_deps, obj["dep_prios"], obj["dep_lines"]))

            def unzip(x: Any) -> Any:
                return zip(*x) if x else ((), (), ())

            obj["dependencies"], prios1, lines1 = unzip(sorted(thing[:num_deps]))
            obj["suppressed"], prios2, lines2 = unzip(sorted(thing[num_deps:]))
            obj["dep_prios"] = prios1 + prios2
            obj["dep_lines"] = lines1 + lines2
    if s.endswith(".deps.json"):
        # For deps files, sort the deps to avoid spurious mismatches
        for v in obj.values():
            v.sort()
    return obj


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--verbose", action="store_true", default=False, help="Increase verbosity"
    )
    parser.add_argument(
        "--sqlite", action="store_true", default=False, help="Use a sqlite cache"
    )
    parser.add_argument("input_dir1", help="Input directory for the cache")
    parser.add_argument("input_dir2", help="Input directory for the cache")
    parser.add_argument("output", help="Output file")
    args = parser.parse_args()

    cache1 = make_cache(args.input_dir1, args.sqlite)
    cache2 = make_cache(args.input_dir2, args.sqlite)

    type_misses: Dict[str, int] = defaultdict(int)
    type_hits: Dict[str, int] = defaultdict(int)

    updates: Dict[str, Optional[str]] = {}

    deps1: Dict[str, Set[str]] = {}
    deps2: Dict[str, Set[str]] = {}

    misses = hits = 0
    cache1_all = list(cache1.list_all())
    for s in cache1_all:
        obj1 = load(cache1, s)
        try:
            obj2 = load(cache2, s)
        except FileNotFoundError:
            obj2 = None

        typ = s.split(".")[-2]
        if obj1 != obj2:
            misses += 1
            type_misses[typ] += 1

            # Collect the dependencies instead of including them directly in the diff
            # so we can produce a much smaller direct diff of them.
            if ".deps." not in s:
                if obj2 is not None:
                    updates[s] = json.dumps(obj2)
                else:
                    updates[s] = None
            elif obj2:
                merge_deps(deps1, obj1)
                merge_deps(deps2, obj2)
        else:
            hits += 1
            type_hits[typ] += 1

    cache1_all_set = set(cache1_all)
    for s in cache2.list_all():
        if s not in cache1_all_set:
            updates[s] = cache2.read(s)

    # Compute what deps have been added and merge them all into the
    # @root deps file.
    new_deps = {k: deps1.get(k, set()) - deps2.get(k, set()) for k in deps2}
    new_deps = {k: v for k, v in new_deps.items() if v}
    try:
        root_deps = load(cache1, "@root.deps.json")
    except FileNotFoundError:
        root_deps = {}
    merge_deps(new_deps, root_deps)

    new_deps_json = {k: list(v) for k, v in new_deps.items() if v}
    updates["@root.deps.json"] = json.dumps(new_deps_json)

    # Drop updates to deps.meta.json for size reasons. The diff
    # applier will manually fix it up.
    updates.pop("./@deps.meta.json", None)
    updates.pop("@deps.meta.json", None)

    ###

    print("Generated incremental cache:", hits, "hits,", misses, "misses")
    if args.verbose:
        print("hits", type_hits)
        print("misses", type_misses)

    with open(args.output, "w") as f:
        json.dump(updates, f)


if __name__ == "__main__":
    main()
